import time
import argparse
import pandas as pd
import os
from crawl_function import crawl_pdf
def parse_args():
    parser = argparse.ArgumentParser(description="加入分表参数")
    parser.add_argument('--count', default=1, help="用于区分不同表格的爬虫")
    args = parser.parse_args()
    return args


if __name__=="__main__":
    detail_url = 'http://www.cninfo.com.cn/new/announcement/bulletin_detail'
    stocklist = pd.read_csv(r'.\公告文件\股票代码.csv')
    args=parse_args()
    st=int(args.count)*5-5
    end=int(args.count)*5
    """初次合并时使用"""
    #merge_annoncement()
    files=os.listdir('.//公告文件//合并公告信息')
    files=['.//公告文件//合并公告信息//'+file for file in files]
    data=pd.read_csv(files[int(args.count)])
    data.apply(lambda data:crawl_pdf(data['ID'],data['code'],data['title'],detail_url,args.count),axis=1)
    time.sleep(0.1)
    print(files[int(args.count)]+'\n'+'已完成爬取')
    # for line in range(len(stocklist)):
    #     code = str(stocklist.loc[line, 'code']).zfill(6)
    #     orgid = str(stocklist.loc[line, 'orgId'])
    #     stock = code + ',' + orgid
